data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"
library(data.table)
library(ggplot2)
library(stats)
library(tigerstats)
## Loading required package: abd
## Loading required package: nlme
## Loading required package: lattice
## Loading required package: grid
## Loading required package: mosaic
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Attaching package: 'mosaic'
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following object is masked from 'package:Matrix':
##
## mean
## The following object is masked from 'package:ggplot2':
##
## stat
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
## Welcome to tigerstats!
## To learn more about this package, consult its website:
## http://homerhanumat.github.io/tigerstats
library(ggplot2)
library(corrplot)
## corrplot 0.84 loaded
library(dplyr)
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:mosaic':
##
## dotPlot
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following objects are masked from 'package:mosaic':
##
## deltaMethod, logit
## The following object is masked from 'package:dplyr':
##
## recode
library(Metrics)
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
#
# Importing the data into the R environment
#
# Read the csv file and display the first few columns
data = fread(data_url, sep=",")
head(data)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13
## 1: 3 ? alfa-romero gas std two convertible rwd front 88.6 168.8 64.1 48.8
## 2: 3 ? alfa-romero gas std two convertible rwd front 88.6 168.8 64.1 48.8
## 3: 1 ? alfa-romero gas std two hatchback rwd front 94.5 171.2 65.5 52.4
## 4: 2 164 audi gas std four sedan fwd front 99.8 176.6 66.2 54.3
## 5: 2 164 audi gas std four sedan 4wd front 99.4 176.6 66.4 54.3
## 6: 2 ? audi gas std two sedan fwd front 99.8 177.3 66.3 53.1
## V14 V15 V16 V17 V18 V19 V20 V21 V22 V23 V24 V25 V26
## 1: 2548 dohc four 130 mpfi 3.47 2.68 9.0 111 5000 21 27 13495
## 2: 2548 dohc four 130 mpfi 3.47 2.68 9.0 111 5000 21 27 16500
## 3: 2823 ohcv six 152 mpfi 2.68 3.47 9.0 154 5000 19 26 16500
## 4: 2337 ohc four 109 mpfi 3.19 3.40 10.0 102 5500 24 30 13950
## 5: 2824 ohc five 136 mpfi 3.19 3.40 8.0 115 5500 18 22 17450
## 6: 2507 ohc five 136 mpfi 3.19 3.40 8.5 110 5500 19 25 15250
# Structure of data
str(data)
## Classes 'data.table' and 'data.frame': 205 obs. of 26 variables:
## $ V1 : int 3 3 1 2 2 2 1 1 1 0 ...
## $ V2 : chr "?" "?" "?" "164" ...
## $ V3 : chr "alfa-romero" "alfa-romero" "alfa-romero" "audi" ...
## $ V4 : chr "gas" "gas" "gas" "gas" ...
## $ V5 : chr "std" "std" "std" "std" ...
## $ V6 : chr "two" "two" "two" "four" ...
## $ V7 : chr "convertible" "convertible" "hatchback" "sedan" ...
## $ V8 : chr "rwd" "rwd" "rwd" "fwd" ...
## $ V9 : chr "front" "front" "front" "front" ...
## $ V10: num 88.6 88.6 94.5 99.8 99.4 ...
## $ V11: num 169 169 171 177 177 ...
## $ V12: num 64.1 64.1 65.5 66.2 66.4 66.3 71.4 71.4 71.4 67.9 ...
## $ V13: num 48.8 48.8 52.4 54.3 54.3 53.1 55.7 55.7 55.9 52 ...
## $ V14: int 2548 2548 2823 2337 2824 2507 2844 2954 3086 3053 ...
## $ V15: chr "dohc" "dohc" "ohcv" "ohc" ...
## $ V16: chr "four" "four" "six" "four" ...
## $ V17: int 130 130 152 109 136 136 136 136 131 131 ...
## $ V18: chr "mpfi" "mpfi" "mpfi" "mpfi" ...
## $ V19: chr "3.47" "3.47" "2.68" "3.19" ...
## $ V20: chr "2.68" "2.68" "3.47" "3.40" ...
## $ V21: num 9 9 9 10 8 8.5 8.5 8.5 8.3 7 ...
## $ V22: chr "111" "111" "154" "102" ...
## $ V23: chr "5000" "5000" "5000" "5500" ...
## $ V24: int 21 21 19 24 18 19 19 19 17 16 ...
## $ V25: int 27 27 26 30 22 25 25 25 20 22 ...
## $ V26: chr "13495" "16500" "16500" "13950" ...
## - attr(*, ".internal.selfref")=<externalptr>
# Column names
names(data)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12"
## [13] "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22" "V23" "V24"
## [25] "V25" "V26"
# Dimension of data
dim(data)
## [1] 205 26
#
# Data Preprocessing
#
# Convert real number column to numeric data type
data$V1 = as.numeric(data$V1)
data$V2 = as.numeric(data$V2)
## Warning: NAs introduced by coercion
data$V14 = as.numeric(data$V14)
data$V17 = as.numeric(data$V17)
data$V19 = as.numeric(data$V19)
## Warning: NAs introduced by coercion
data$V20 = as.numeric(data$V20)
## Warning: NAs introduced by coercion
data$V21 = as.numeric(data$V21)
data$V22 = as.numeric(data$V22)
## Warning: NAs introduced by coercion
data$V23 = as.numeric(data$V23)
## Warning: NAs introduced by coercion
data$V24 = as.numeric(data$V24)
data$V25 = as.numeric(data$V25)
data$V26 = as.numeric(data$V26)
## Warning: NAs introduced by coercion
# Checking for null values in every column
colSums(is.na(data))
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 0 41 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 4 4
## V21 V22 V23 V24 V25 V26
## 0 2 2 0 0 4
# Removing the rows in which the target column ie. V26 is NA
data = data[!is.na(data$V26),]
# Replacing other NA with mean of that column
isNACharacter = function(X) {
return(X=="?")
}
replaceNaWithMeanOrMode = function(X) {
if(!any(is.na(X))) {
if(is.character(X)) {
X_mode = replace(X, isNACharacter(X), names((sort(-table(X)))[1]))
return(X_mode)
} else {
return(X)
}
} else {
X_mean = replace(X, is.na(X), mean(X, na.rm = TRUE))
return(X_mean)
}
}
data = data[, lapply(.SD, replaceNaWithMeanOrMode)]
colSums(is.na(data)) # Checking if all null values have been removed
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## V21 V22 V23 V24 V25 V26
## 0 0 0 0 0 0
# Distribution of categorical data
for(name in names(data)) {
X = data[[name]]
if(!is.double(X)) {
distribution = xtabs(formula = as.formula(paste("~", name)), data=data)
rowPercentage = rowPerc(distribution)
print(distribution)
print(rowPercentage)
pie(distribution, main=name, radius=1)
}
}
## V3
## alfa-romero audi bmw chevrolet dodge
## 3 6 8 3 9
## honda isuzu jaguar mazda mercedes-benz
## 13 2 3 17 8
## mercury mitsubishi nissan peugot plymouth
## 1 13 18 11 7
## porsche renault saab subaru toyota
## 4 2 6 12 32
## volkswagen volvo
## 12 11
##
## V3 alfa-romero audi bmw chevrolet dodge honda isuzu jaguar mazda mercedes-benz
## 1.49 2.99 3.98 1.49 4.48 6.47 1 1.49 8.46 3.98
##
## V3 mercury mitsubishi nissan peugot plymouth porsche renault saab subaru toyota
## 0.5 6.47 8.96 5.47 3.48 1.99 1 2.99 5.97 15.92
##
## V3 volkswagen volvo Total
## 5.97 5.47 100

## V4
## diesel gas
## 20 181
##
## V4 diesel gas Total
## 9.95 90.05 100

## V5
## std turbo
## 165 36
##
## V5 std turbo Total
## 82.09 17.91 100

## V6
## four two
## 115 86
##
## V6 four two Total
## 57.21 42.79 100

## V7
## convertible hardtop hatchback sedan wagon
## 6 8 68 94 25
##
## V7 convertible hardtop hatchback sedan wagon Total
## 2.99 3.98 33.83 46.77 12.44 100

## V8
## 4wd fwd rwd
## 8 118 75
##
## V8 4wd fwd rwd Total
## 3.98 58.71 37.31 100

## V9
## front rear
## 198 3
##
## V9 front rear Total
## 98.51 1.49 100

## V15
## dohc l ohc ohcf ohcv rotor
## 12 12 145 15 13 4
##
## V15 dohc l ohc ohcf ohcv rotor Total
## 5.97 5.97 72.14 7.46 6.47 1.99 100

## V16
## eight five four six three twelve two
## 4 10 157 24 1 1 4
##
## V16 eight five four six three twelve two Total
## 1.99 4.98 78.11 11.94 0.5 0.5 1.99 100

## V18
## 1bbl 2bbl 4bbl idi mfi mpfi spdi spfi
## 11 64 3 20 1 92 9 1
##
## V18 1bbl 2bbl 4bbl idi mfi mpfi spdi spfi Total
## 5.47 31.84 1.49 9.95 0.5 45.77 4.48 0.5 100

# Histogram of numerical column and summary
for(name in names(data)) {
X = data[[name]]
if(!is.character(X)) {
print(name)
print(summary(X))
hist(X, main=name)
}
}
## [1] "V1"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.0000 0.0000 1.0000 0.8408 2.0000 3.0000

## [1] "V2"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 65 101 122 122 137 256

## [1] "V10"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 86.6 94.5 97.0 98.8 102.4 120.9

## [1] "V11"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 141.1 166.8 173.2 174.2 183.5 208.1

## [1] "V12"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 60.30 64.10 65.50 65.89 66.60 72.00

## [1] "V13"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 47.80 52.00 54.10 53.77 55.50 59.80

## [1] "V14"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1488 2169 2414 2556 2926 4066

## [1] "V17"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 61.0 98.0 120.0 126.9 141.0 326.0

## [1] "V19"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.540 3.150 3.310 3.331 3.580 3.940

## [1] "V20"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.070 3.110 3.290 3.257 3.410 4.170

## [1] "V21"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 8.60 9.00 10.16 9.40 23.00

## [1] "V22"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 48.0 70.0 95.0 103.4 116.0 262.0

## [1] "V23"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4150 4800 5118 5118 5500 6600

## [1] "V24"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13.00 19.00 24.00 25.18 30.00 49.00

## [1] "V25"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.00 25.00 30.00 30.69 34.00 54.00

## [1] "V26"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5118 7775 10295 13207 16500 45400

# Advanced Exploration of the dataset
xtabs(~V3+V9, data=data)
## V9
## V3 front rear
## alfa-romero 3 0
## audi 6 0
## bmw 8 0
## chevrolet 3 0
## dodge 9 0
## honda 13 0
## isuzu 2 0
## jaguar 3 0
## mazda 17 0
## mercedes-benz 8 0
## mercury 1 0
## mitsubishi 13 0
## nissan 18 0
## peugot 11 0
## plymouth 7 0
## porsche 1 3
## renault 2 0
## saab 6 0
## subaru 12 0
## toyota 32 0
## volkswagen 12 0
## volvo 11 0
plot(data$V1, data$V26)

plot(data$V2, data$V26)

boxplot(data$V2)

qqplot(data$V1, data$V26)

ggplot(data) +
aes(x = V26) +
geom_density()

# Correlation Plot
cor(select_if(data, is.numeric)[,])
## V1 V2 V10 V11 V12 V13
## V1 1.000000000 0.46626376 -0.53598680 -0.36540436 -0.24242260 -0.55015986
## V2 0.466263758 1.00000000 -0.05666124 0.01942356 0.08680206 -0.37373695
## V10 -0.535986803 -0.05666124 1.00000000 0.87602389 0.81450665 0.59074167
## V11 -0.365404363 0.01942356 0.87602389 1.00000000 0.85717032 0.49206255
## V12 -0.242422604 0.08680206 0.81450665 0.85717032 1.00000000 0.30600216
## V13 -0.550159864 -0.37373695 0.59074167 0.49206255 0.30600216 1.00000000
## V14 -0.233118485 0.09940425 0.78209724 0.88066479 0.86620110 0.30758082
## V17 -0.110580556 0.11236002 0.57202669 0.68502476 0.72943564 0.07469409
## V19 -0.139896218 -0.02979985 0.49320299 0.60894083 0.54487909 0.18032692
## V20 -0.007991611 0.05512732 0.15796369 0.12391279 0.18881359 -0.06082202
## V21 -0.182196158 -0.11471325 0.25031309 0.15973311 0.18986712 0.25973714
## V22 0.075789909 0.21730000 0.37124988 0.57973062 0.61500603 -0.08694068
## V23 0.279719376 0.23954380 -0.36023264 -0.28603534 -0.24585165 -0.30991346
## V24 -0.035527043 -0.22501573 -0.47060641 -0.66519239 -0.63353064 -0.04979997
## V25 0.036232811 -0.18187718 -0.54330447 -0.69814185 -0.68063521 -0.10481184
## V26 -0.082391187 0.13399873 0.58464182 0.69062838 0.75126534 0.13548631
## V14 V17 V19 V20 V21 V22
## V1 -0.23311849 -0.11058056 -0.139896218 -0.007991611 -0.182196158 0.07578991
## V2 0.09940425 0.11236002 -0.029799848 0.055127318 -0.114713246 0.21730000
## V10 0.78209724 0.57202669 0.493202986 0.157963690 0.250313088 0.37124988
## V11 0.88066479 0.68502476 0.608940834 0.123912787 0.159733109 0.57973062
## V12 0.86620110 0.72943564 0.544879092 0.188813592 0.189867118 0.61500603
## V13 0.30758082 0.07469409 0.180326923 -0.060822019 0.259737141 -0.08694068
## V14 1.00000000 0.84907166 0.644040577 0.167411865 0.156432613 0.75799367
## V17 0.84907166 1.00000000 0.572515716 0.205805674 0.028888633 0.82264944
## V19 0.64404058 0.57251572 1.000000000 -0.055390011 0.001249645 0.56683786
## V20 0.16741187 0.20580567 -0.055390011 1.000000000 0.187853516 0.09781468
## V21 0.15643261 0.02888863 0.001249645 0.187853516 1.000000000 -0.21443063
## V22 0.75799367 0.82264944 0.566837859 0.097814677 -0.214430629 1.00000000
## V23 -0.27934961 -0.25675339 -0.267338383 -0.063719567 -0.435720829 0.10788157
## V24 -0.74954309 -0.65054598 -0.582121055 -0.034078944 0.331424839 -0.82213847
## V25 -0.79488894 -0.67957126 -0.591390045 -0.034741423 0.268464848 -0.80458746
## V26 0.83441453 0.87233517 0.543153766 0.082267100 0.071107327 0.80968120
## V23 V24 V25 V26
## V1 0.27971938 -0.03552704 0.03623281 -0.08239119
## V2 0.23954380 -0.22501573 -0.18187718 0.13399873
## V10 -0.36023264 -0.47060641 -0.54330447 0.58464182
## V11 -0.28603534 -0.66519239 -0.69814185 0.69062838
## V12 -0.24585165 -0.63353064 -0.68063521 0.75126534
## V13 -0.30991346 -0.04979997 -0.10481184 0.13548631
## V14 -0.27934961 -0.74954309 -0.79488894 0.83441453
## V17 -0.25675339 -0.65054598 -0.67957126 0.87233517
## V19 -0.26733838 -0.58212106 -0.59139004 0.54315377
## V20 -0.06371957 -0.03407894 -0.03474142 0.08226710
## V21 -0.43572083 0.33142484 0.26846485 0.07110733
## V22 0.10788157 -0.82213847 -0.80458746 0.80968120
## V23 1.00000000 -0.11535804 -0.05860516 -0.10154203
## V24 -0.11535804 1.00000000 0.97204371 -0.68657101
## V25 -0.05860516 0.97204371 1.00000000 -0.70469227
## V26 -0.10154203 -0.68657101 -0.70469227 1.00000000
M = cor(select_if(data, is.numeric))
corrplot(M, method = "number")

favstats(~V1, data=data)
## min Q1 median Q3 max mean sd n missing
## -2 0 1 2 3 0.840796 1.254802 201 0
densityplot(~V2,data=data,
groups=V6,
auto.key=TRUE)

summary(data)
## V1 V2 V3 V4
## Min. :-2.0000 Min. : 65 Length:201 Length:201
## 1st Qu.: 0.0000 1st Qu.:101 Class :character Class :character
## Median : 1.0000 Median :122 Mode :character Mode :character
## Mean : 0.8408 Mean :122
## 3rd Qu.: 2.0000 3rd Qu.:137
## Max. : 3.0000 Max. :256
## V5 V6 V7 V8
## Length:201 Length:201 Length:201 Length:201
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## V9 V10 V11 V12
## Length:201 Min. : 86.6 Min. :141.1 Min. :60.30
## Class :character 1st Qu.: 94.5 1st Qu.:166.8 1st Qu.:64.10
## Mode :character Median : 97.0 Median :173.2 Median :65.50
## Mean : 98.8 Mean :174.2 Mean :65.89
## 3rd Qu.:102.4 3rd Qu.:183.5 3rd Qu.:66.60
## Max. :120.9 Max. :208.1 Max. :72.00
## V13 V14 V15 V16
## Min. :47.80 Min. :1488 Length:201 Length:201
## 1st Qu.:52.00 1st Qu.:2169 Class :character Class :character
## Median :54.10 Median :2414 Mode :character Mode :character
## Mean :53.77 Mean :2556
## 3rd Qu.:55.50 3rd Qu.:2926
## Max. :59.80 Max. :4066
## V17 V18 V19 V20
## Min. : 61.0 Length:201 Min. :2.540 Min. :2.070
## 1st Qu.: 98.0 Class :character 1st Qu.:3.150 1st Qu.:3.110
## Median :120.0 Mode :character Median :3.310 Median :3.290
## Mean :126.9 Mean :3.331 Mean :3.257
## 3rd Qu.:141.0 3rd Qu.:3.580 3rd Qu.:3.410
## Max. :326.0 Max. :3.940 Max. :4.170
## V21 V22 V23 V24 V25
## Min. : 7.00 Min. : 48.0 Min. :4150 Min. :13.00 Min. :16.00
## 1st Qu.: 8.60 1st Qu.: 70.0 1st Qu.:4800 1st Qu.:19.00 1st Qu.:25.00
## Median : 9.00 Median : 95.0 Median :5118 Median :24.00 Median :30.00
## Mean :10.16 Mean :103.4 Mean :5118 Mean :25.18 Mean :30.69
## 3rd Qu.: 9.40 3rd Qu.:116.0 3rd Qu.:5500 3rd Qu.:30.00 3rd Qu.:34.00
## Max. :23.00 Max. :262.0 Max. :6600 Max. :49.00 Max. :54.00
## V26
## Min. : 5118
## 1st Qu.: 7775
## Median :10295
## Mean :13207
## 3rd Qu.:16500
## Max. :45400
# One Hot Encoding
dummy <- dummyVars(" ~ .", data=data)
newdata <- data.table(predict(dummy, newdata = data))
data = newdata
# Normalise the data
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
data = data[, lapply(.SD, normalize)]
# Anova Test
residuals<-aov(V26~.,data=data)
summary.aov(residuals)
## Df Sum Sq Mean Sq F value Pr(>F)
## V1 1 0.0528 0.0528 29.101 2.85e-07 ***
## V2 1 0.2957 0.2957 162.837 < 2e-16 ***
## `V3alfa-romero` 1 0.0315 0.0315 17.356 5.39e-05 ***
## V3audi 1 0.0648 0.0648 35.712 1.80e-08 ***
## V3bmw 1 0.6469 0.6469 356.269 < 2e-16 ***
## V3chevrolet 1 0.0625 0.0625 34.412 3.07e-08 ***
## V3dodge 1 0.1406 0.1406 77.407 4.64e-15 ***
## V3honda 1 0.1635 0.1635 90.050 < 2e-16 ***
## V3isuzu 1 0.0216 0.0216 11.905 0.00074 ***
## V3jaguar 1 0.8056 0.8056 443.661 < 2e-16 ***
## V3mazda 1 0.0512 0.0512 28.183 4.23e-07 ***
## `V3mercedes-benz` 1 2.2052 2.2052 1214.425 < 2e-16 ***
## V3mercury 1 0.0135 0.0135 7.431 0.00723 **
## V3mitsubishi 1 0.0765 0.0765 42.137 1.37e-09 ***
## V3nissan 1 0.0532 0.0532 29.291 2.63e-07 ***
## V3peugot 1 0.0582 0.0582 32.059 8.17e-08 ***
## V3plymouth 1 0.0868 0.0868 47.786 1.55e-10 ***
## V3porsche 1 0.9076 0.9076 499.805 < 2e-16 ***
## V3renault 1 0.0041 0.0041 2.240 0.13677
## V3saab 1 0.0807 0.0807 44.459 5.55e-10 ***
## V3subaru 1 0.0457 0.0457 25.171 1.57e-06 ***
## V3toyota 1 0.1253 0.1253 68.990 7.64e-14 ***
## V3volkswagen 1 0.2190 0.2190 120.630 < 2e-16 ***
## V4diesel 1 0.0059 0.0059 3.272 0.07262 .
## V5std 1 0.1102 0.1102 60.714 1.35e-12 ***
## V6four 1 0.0074 0.0074 4.060 0.04582 *
## V7convertible 1 0.0321 0.0321 17.671 4.66e-05 ***
## V7hardtop 1 0.0141 0.0141 7.746 0.00613 **
## V7hatchback 1 0.0000 0.0000 0.007 0.93487
## V7sedan 1 0.0000 0.0000 0.005 0.94117
## V84wd 1 0.0004 0.0004 0.204 0.65216
## V8fwd 1 0.3002 0.3002 165.303 < 2e-16 ***
## V9front 1 0.0435 0.0435 23.954 2.68e-06 ***
## V10 1 0.2407 0.2407 132.550 < 2e-16 ***
## V11 1 0.1110 0.1110 61.149 1.16e-12 ***
## V12 1 0.1215 0.1215 66.884 1.57e-13 ***
## V13 1 0.0046 0.0046 2.537 0.11345
## V14 1 0.1314 0.1314 72.343 2.47e-14 ***
## V15dohc 1 0.0018 0.0018 1.005 0.31779
## V15l 1 0.0097 0.0097 5.333 0.02239 *
## V15ohc 1 0.0124 0.0124 6.837 0.00991 **
## V15ohcv 1 0.0028 0.0028 1.535 0.21743
## V16eight 1 0.0670 0.0670 36.874 1.12e-08 ***
## V16five 1 0.0010 0.0010 0.553 0.45836
## V16four 1 0.0072 0.0072 3.992 0.04766 *
## V16six 1 0.0047 0.0047 2.565 0.11151
## V17 1 0.0139 0.0139 7.633 0.00650 **
## V181bbl 1 0.0020 0.0020 1.099 0.29634
## V182bbl 1 0.0062 0.0062 3.423 0.06641 .
## V184bbl 1 0.0026 0.0026 1.417 0.23586
## V18mfi 1 0.0007 0.0007 0.365 0.54683
## V18mpfi 1 0.0004 0.0004 0.223 0.63712
## V18spdi 1 0.0003 0.0003 0.154 0.69525
## V19 1 0.0194 0.0194 10.669 0.00137 **
## V20 1 0.0035 0.0035 1.933 0.16664
## V21 1 0.0068 0.0068 3.765 0.05435 .
## V22 1 0.0054 0.0054 2.989 0.08604 .
## V23 1 0.0200 0.0200 11.011 0.00115 **
## V24 1 0.0040 0.0040 2.222 0.13830
## V25 1 0.0029 0.0029 1.585 0.21011
## Residuals 140 0.2542 0.0018
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Checking if linear regression is linear in parameters
data = data.table(data, check.names = TRUE)
column_names = names(data[,1:74])
input_form = paste(column_names, collapse="+")
formula = as.formula(paste("V26 ~ ", input_form))
mod <- lm(formula=formula, data=data)
summary(mod)
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08730 -0.02422 0.00000 0.02095 0.18874
##
## Coefficients: (14 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.203078 0.179570 1.131 0.260026
## V1 -0.054994 0.032079 -1.714 0.088688 .
## V2 -0.052359 0.035359 -1.481 0.140910
## V3alfa.romero 0.056041 0.054156 1.035 0.302542
## V3audi 0.132129 0.053502 2.470 0.014728 *
## V3bmw 0.217117 0.031663 6.857 2.07e-10 ***
## V3chevrolet -0.063346 0.044575 -1.421 0.157508
## V3dodge -0.068660 0.039137 -1.754 0.081562 .
## V3honda 0.001758 0.048356 0.036 0.971053
## V3isuzu -0.035764 0.056289 -0.635 0.526229
## V3jaguar 0.028849 0.070382 0.410 0.682508
## V3mazda 0.013737 0.031818 0.432 0.666604
## V3mercedes.benz 0.115136 0.048812 2.359 0.019719 *
## V3mercury -0.029460 0.051993 -0.567 0.571893
## V3mitsubishi -0.068498 0.041857 -1.636 0.103981
## V3nissan 0.005396 0.033856 0.159 0.873608
## V3peugot -0.279359 0.143865 -1.942 0.054168 .
## V3plymouth -0.067038 0.038784 -1.729 0.086102 .
## V3porsche 0.163841 0.056684 2.890 0.004461 **
## V3renault -0.015575 0.051520 -0.302 0.762861
## V3saab 0.126857 0.045883 2.765 0.006463 **
## V3subaru -0.222542 0.145812 -1.526 0.129209
## V3toyota -0.022110 0.028744 -0.769 0.443061
## V3volkswagen 0.036391 0.038916 0.935 0.351340
## V3volvo NA NA NA NA
## V4diesel 0.253966 0.164502 1.544 0.124881
## V4gas NA NA NA NA
## V5std -0.048322 0.019959 -2.421 0.016757 *
## V5turbo NA NA NA NA
## V6four -0.002805 0.012454 -0.225 0.822150
## V6two NA NA NA NA
## V7convertible 0.070444 0.031687 2.223 0.027811 *
## V7hardtop 0.015039 0.027328 0.550 0.582980
## V7hatchback -0.005502 0.018765 -0.293 0.769812
## V7sedan 0.006132 0.013330 0.460 0.646213
## V7wagon NA NA NA NA
## V84wd -0.006847 0.030664 -0.223 0.823647
## V8fwd -0.027258 0.021532 -1.266 0.207639
## V8rwd NA NA NA NA
## V9front -0.003314 0.157549 -0.021 0.983248
## V9rear NA NA NA NA
## V10 0.203450 0.077771 2.616 0.009872 **
## V11 -0.224214 0.082956 -2.703 0.007727 **
## V12 0.176648 0.065320 2.704 0.007693 **
## V13 -0.120779 0.043309 -2.789 0.006027 **
## V14 0.407057 0.106033 3.839 0.000186 ***
## V15dohc -0.232372 0.141128 -1.647 0.101898
## V15l -0.001444 0.083099 -0.017 0.986158
## V15ohc -0.214642 0.141073 -1.521 0.130391
## V15ohcf NA NA NA NA
## V15ohcv -0.295973 0.136265 -2.172 0.031536 *
## V15rotor NA NA NA NA
## V16eight 0.141430 0.126239 1.120 0.264489
## V16five -0.011064 0.134347 -0.082 0.934480
## V16four 0.065186 0.129956 0.502 0.616735
## V16six 0.050827 0.109826 0.463 0.644231
## V16three NA NA NA NA
## V16twelve NA NA NA NA
## V16two NA NA NA NA
## V17 0.609293 0.165053 3.691 0.000319 ***
## V181bbl -0.055236 0.075027 -0.736 0.462830
## V182bbl 0.007913 0.064641 0.122 0.902750
## V184bbl -0.066441 0.083622 -0.795 0.428227
## V18idi NA NA NA NA
## V18mfi -0.046368 0.080843 -0.574 0.567186
## V18mpfi -0.024123 0.065853 -0.366 0.714678
## V18spdi -0.050339 0.069622 -0.723 0.470866
## V18spfi NA NA NA NA
## V19 -0.123262 0.063182 -1.951 0.053067 .
## V20 -0.066424 0.050791 -1.308 0.193088
## V21 -0.334403 0.192178 -1.740 0.084045 .
## V22 0.012711 0.129492 0.098 0.921944
## V23 0.144270 0.038961 3.703 0.000306 ***
## V24 -0.031465 0.118173 -0.266 0.790427
## V25 0.134543 0.106861 1.259 0.210109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04261 on 140 degrees of freedom
## Multiple R-squared: 0.9673, Adjusted R-squared: 0.9533
## F-statistic: 69.11 on 60 and 140 DF, p-value: < 2.2e-16
# The mean of residuals is zero
mean(mod$residuals)
## [1] -1.877564e-18
# Homoscedasticity of residuals or equal variance
par(mfrow=c(2,2))
plot(mod)
## Warning: not plotting observations with leverage one:
## 18, 29, 43, 44, 47, 56, 73, 123

# No autocorrelation of residuals
acf(mod$residuals)
# The X variables and residuals are uncorrelatedThe X variables and residuals are uncorrelated
checkCorrelationWithMod = function(X) {
print(cor.test(X, mod$residuals))
}
lapply(data, checkCorrelationWithMod)
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.5143e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.491239e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.2677e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.316386e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4493e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.736269e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.4121e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.709866e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 6.0688e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.302049e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.6476e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.003508e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.6948e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.201423e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.1927e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -3.68104e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.0808e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -7.661577e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 5.7165e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.052285e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.7862e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.975052e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.4125e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.001308e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.4337e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -3.851882e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.0574e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 5.002875e-18
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.3339e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.072213e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.007e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.138509e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.0149e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -5.681607e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.0223e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.142439e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.019e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.431215e-18
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.4582e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.033721e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.6227e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.859167e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.0299e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.301103e-18
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.6708e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -6.146553e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.5837e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.54045e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -6.6602e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.721295e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 6.6591e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.720483e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.5015e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -6.026523e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 8.5013e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 6.026411e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.4319e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.015077e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.4303e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.013927e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.042e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.447515e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.3823e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 5.233198e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.4482e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.153271e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.6285e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.154413e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4839e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.76081e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.8272e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.295282e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.4439e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.441318e-18
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.0376e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.988841e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -4.6563e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -3.300788e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.6563e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.300786e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -7.3493e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -5.209776e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.7853e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.974439e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.7289e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.225565e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4989e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.771454e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.5438e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.512108e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 9.3179e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 6.605288e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.2909e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.332835e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.0589e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.459495e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.912e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.355407e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.5162e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.074779e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.2697e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.608928e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.0867e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.703127e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.5695e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.821505e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.1207e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -7.944101e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.866e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.031682e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.4337e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.016348e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.0909e-13, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.733242e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.2697e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.608928e-15
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.2536e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.306393e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.4874e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.763287e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.3992e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -9.918423e-18
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.2495e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -8.857124e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -6.6602e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.721295e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.0054e-18, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.839367e-19
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.7767e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.259457e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.8363e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.137234e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.7617e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.084384e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.6932e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.200252e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.8474e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -6.27176e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.3811e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.396832e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.4534e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.448055e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 5.1484e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.64958e-18
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.322e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 5.190433e-17
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.4125e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.710192e-16
##
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.592, df = 199, p-value = 0.01025
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04340509 0.31132282
## sample estimates:
## cor
## 0.1807143
## $V1
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.5143e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.491239e-16
##
##
## $V2
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.2677e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.316386e-16
##
##
## $V3alfa.romero
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4493e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.736269e-16
##
##
## $V3audi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.4121e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.709866e-17
##
##
## $V3bmw
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 6.0688e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.302049e-17
##
##
## $V3chevrolet
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.6476e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.003508e-17
##
##
## $V3dodge
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.6948e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.201423e-17
##
##
## $V3honda
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.1927e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -3.68104e-17
##
##
## $V3isuzu
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.0808e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -7.661577e-17
##
##
## $V3jaguar
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 5.7165e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.052285e-17
##
##
## $V3mazda
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.7862e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.975052e-17
##
##
## $V3mercedes.benz
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.4125e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.001308e-16
##
##
## $V3mercury
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.4337e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -3.851882e-17
##
##
## $V3mitsubishi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.0574e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 5.002875e-18
##
##
## $V3nissan
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.3339e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.072213e-16
##
##
## $V3peugot
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.007e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.138509e-17
##
##
## $V3plymouth
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.0149e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -5.681607e-17
##
##
## $V3porsche
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.0223e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.142439e-17
##
##
## $V3renault
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.019e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.431215e-18
##
##
## $V3saab
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.4582e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.033721e-16
##
##
## $V3subaru
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.6227e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.859167e-16
##
##
## $V3toyota
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.0299e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.301103e-18
##
##
## $V3volkswagen
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.6708e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -6.146553e-17
##
##
## $V3volvo
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.5837e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.54045e-16
##
##
## $V4diesel
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -6.6602e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.721295e-17
##
##
## $V4gas
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 6.6591e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.720483e-17
##
##
## $V5std
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.5015e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -6.026523e-16
##
##
## $V5turbo
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 8.5013e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 6.026411e-16
##
##
## $V6four
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.4319e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.015077e-17
##
##
## $V6two
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.4303e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.013927e-17
##
##
## $V7convertible
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.042e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.447515e-16
##
##
## $V7hardtop
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.3823e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 5.233198e-17
##
##
## $V7hatchback
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.4482e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.153271e-17
##
##
## $V7sedan
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.6285e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.154413e-16
##
##
## $V7wagon
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4839e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.76081e-16
##
##
## $V84wd
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.8272e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.295282e-16
##
##
## $V8fwd
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.4439e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.441318e-18
##
##
## $V8rwd
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.0376e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 4.988841e-17
##
##
## $V9front
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -4.6563e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -3.300788e-15
##
##
## $V9rear
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.6563e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.300786e-15
##
##
## $V10
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -7.3493e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -5.209776e-17
##
##
## $V11
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.7853e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.974439e-16
##
##
## $V12
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.7289e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.225565e-16
##
##
## $V13
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.4989e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.771454e-16
##
##
## $V14
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.5438e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.512108e-16
##
##
## $V15dohc
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 9.3179e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 6.605288e-17
##
##
## $V15l
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.2909e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.332835e-16
##
##
## $V15ohc
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.0589e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.459495e-16
##
##
## $V15ohcf
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.912e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.355407e-15
##
##
## $V15ohcv
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.5162e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.074779e-16
##
##
## $V15rotor
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.2697e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.608928e-15
##
##
## $V16eight
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.0867e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.703127e-17
##
##
## $V16five
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.5695e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.821505e-17
##
##
## $V16four
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.1207e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -7.944101e-16
##
##
## $V16six
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.866e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.031682e-16
##
##
## $V16three
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.4337e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.016348e-15
##
##
## $V16twelve
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.0909e-13, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 7.733242e-15
##
##
## $V16two
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -2.2697e-14, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.608928e-15
##
##
## $V17
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.2536e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.306393e-16
##
##
## $V181bbl
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.4874e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.763287e-17
##
##
## $V182bbl
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.3992e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -9.918423e-18
##
##
## $V184bbl
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.2495e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -8.857124e-17
##
##
## $V18idi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -6.6602e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.721295e-17
##
##
## $V18mfi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 4.0054e-18, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.839367e-19
##
##
## $V18mpfi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 1.7767e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.259457e-16
##
##
## $V18spdi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.8363e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.137234e-17
##
##
## $V18spfi
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -5.7617e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -4.084384e-16
##
##
## $V19
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -1.6932e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -1.200252e-16
##
##
## $V20
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -8.8474e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -6.27176e-16
##
##
## $V21
##
## Pearson's product-moment correlation
##
## data: x and y
## t = -3.3811e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## -2.396832e-17
##
##
## $V22
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 3.4534e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 2.448055e-16
##
##
## $V23
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 5.1484e-17, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 3.64958e-18
##
##
## $V24
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 7.322e-16, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 5.190433e-17
##
##
## $V25
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.4125e-15, df = 199, p-value = 1
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1383947 0.1383947
## sample estimates:
## cor
## 1.710192e-16
##
##
## $V26
##
## Pearson's product-moment correlation
##
## data: x and y
## t = 2.592, df = 199, p-value = 0.01025
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.04340509 0.31132282
## sample estimates:
## cor
## 0.1807143
# The number of observations must be greater than number of Xs
nrow(data) > ncol(data)
## [1] TRUE
# Variability of X
checkVariate = function(X) {
print(var(X))
}
lapply(data, checkVariate)
## [1] 0.06298109
## [1] 0.02806283
## [1] 0.01477612
## [1] 0.02910448
## [1] 0.03840796
## [1] 0.01477612
## [1] 0.04298507
## [1] 0.06079602
## [1] 0.009900498
## [1] 0.01477612
## [1] 0.07781095
## [1] 0.03840796
## [1] 0.004975124
## [1] 0.06079602
## [1] 0.0819403
## [1] 0.05199005
## [1] 0.03378109
## [1] 0.01960199
## [1] 0.009900498
## [1] 0.02910448
## [1] 0.05641791
## [1] 0.1345274
## [1] 0.05641791
## [1] 0.05199005
## [1] 0.09004975
## [1] 0.09004975
## [1] 0.1477612
## [1] 0.1477612
## [1] 0.2460199
## [1] 0.2460199
## [1] 0.02910448
## [1] 0.03840796
## [1] 0.2249751
## [1] 0.250199
## [1] 0.1094527
## [1] 0.03840796
## [1] 0.2436318
## [1] 0.2350746
## [1] 0.01477612
## [1] 0.01477612
## [1] 0.03128016
## [1] 0.03382401
## [1] 0.03226079
## [1] 0.04160995
## [1] 0.0402637
## [1] 0.05641791
## [1] 0.05641791
## [1] 0.20199
## [1] 0.06940299
## [1] 0.06079602
## [1] 0.01960199
## [1] 0.01960199
## [1] 0.04751244
## [1] 0.1718408
## [1] 0.1056716
## [1] 0.004975124
## [1] 0.004975124
## [1] 0.01960199
## [1] 0.02458013
## [1] 0.05199005
## [1] 0.2181095
## [1] 0.01477612
## [1] 0.09004975
## [1] 0.004975124
## [1] 0.2494527
## [1] 0.04298507
## [1] 0.004975124
## [1] 0.03666454
## [1] 0.0226499
## [1] 0.06265527
## [1] 0.03048712
## [1] 0.03808283
## [1] 0.03183469
## [1] 0.03216501
## [1] 0.03892168
## $V1
## [1] 0.06298109
##
## $V2
## [1] 0.02806283
##
## $V3alfa.romero
## [1] 0.01477612
##
## $V3audi
## [1] 0.02910448
##
## $V3bmw
## [1] 0.03840796
##
## $V3chevrolet
## [1] 0.01477612
##
## $V3dodge
## [1] 0.04298507
##
## $V3honda
## [1] 0.06079602
##
## $V3isuzu
## [1] 0.009900498
##
## $V3jaguar
## [1] 0.01477612
##
## $V3mazda
## [1] 0.07781095
##
## $V3mercedes.benz
## [1] 0.03840796
##
## $V3mercury
## [1] 0.004975124
##
## $V3mitsubishi
## [1] 0.06079602
##
## $V3nissan
## [1] 0.0819403
##
## $V3peugot
## [1] 0.05199005
##
## $V3plymouth
## [1] 0.03378109
##
## $V3porsche
## [1] 0.01960199
##
## $V3renault
## [1] 0.009900498
##
## $V3saab
## [1] 0.02910448
##
## $V3subaru
## [1] 0.05641791
##
## $V3toyota
## [1] 0.1345274
##
## $V3volkswagen
## [1] 0.05641791
##
## $V3volvo
## [1] 0.05199005
##
## $V4diesel
## [1] 0.09004975
##
## $V4gas
## [1] 0.09004975
##
## $V5std
## [1] 0.1477612
##
## $V5turbo
## [1] 0.1477612
##
## $V6four
## [1] 0.2460199
##
## $V6two
## [1] 0.2460199
##
## $V7convertible
## [1] 0.02910448
##
## $V7hardtop
## [1] 0.03840796
##
## $V7hatchback
## [1] 0.2249751
##
## $V7sedan
## [1] 0.250199
##
## $V7wagon
## [1] 0.1094527
##
## $V84wd
## [1] 0.03840796
##
## $V8fwd
## [1] 0.2436318
##
## $V8rwd
## [1] 0.2350746
##
## $V9front
## [1] 0.01477612
##
## $V9rear
## [1] 0.01477612
##
## $V10
## [1] 0.03128016
##
## $V11
## [1] 0.03382401
##
## $V12
## [1] 0.03226079
##
## $V13
## [1] 0.04160995
##
## $V14
## [1] 0.0402637
##
## $V15dohc
## [1] 0.05641791
##
## $V15l
## [1] 0.05641791
##
## $V15ohc
## [1] 0.20199
##
## $V15ohcf
## [1] 0.06940299
##
## $V15ohcv
## [1] 0.06079602
##
## $V15rotor
## [1] 0.01960199
##
## $V16eight
## [1] 0.01960199
##
## $V16five
## [1] 0.04751244
##
## $V16four
## [1] 0.1718408
##
## $V16six
## [1] 0.1056716
##
## $V16three
## [1] 0.004975124
##
## $V16twelve
## [1] 0.004975124
##
## $V16two
## [1] 0.01960199
##
## $V17
## [1] 0.02458013
##
## $V181bbl
## [1] 0.05199005
##
## $V182bbl
## [1] 0.2181095
##
## $V184bbl
## [1] 0.01477612
##
## $V18idi
## [1] 0.09004975
##
## $V18mfi
## [1] 0.004975124
##
## $V18mpfi
## [1] 0.2494527
##
## $V18spdi
## [1] 0.04298507
##
## $V18spfi
## [1] 0.004975124
##
## $V19
## [1] 0.03666454
##
## $V20
## [1] 0.0226499
##
## $V21
## [1] 0.06265527
##
## $V22
## [1] 0.03048712
##
## $V23
## [1] 0.03808283
##
## $V24
## [1] 0.03183469
##
## $V25
## [1] 0.03216501
##
## $V26
## [1] 0.03892168
# Check if X and Y have inverse relationship ( Does not hold true here)
for(i in names(data)) {
if( i != "V26") {
plot(data[[i]], data$V26, xlab = i)
}
}


















# No perfect multicollinearity
## Removing linearly dependent variables
ld.vars <- attributes(alias(mod)$Complete)$dimnames[[1]]
formula.new <- as.formula(
paste(
paste(deparse(formula), collapse=""),
paste(ld.vars, collapse="-"),
sep="-"
)
)
mod.new = lm(formula.new, data=data)
vif(mod.new)
## V1 V2 V3alfa.romero V3audi V3bmw
## 7.138638 3.864447 4.773155 9.175904 4.241116
## V3chevrolet V3dodge V3honda V3isuzu V3jaguar
## 3.233614 7.251909 15.657402 3.455044 8.061857
## V3mazda V3mercedes.benz V3mercury V3mitsubishi V3nissan
## 8.676386 10.079353 1.481333 11.731590 10.344755
## V3peugot V3plymouth V3porsche V3renault V3saab
## 118.517539 5.596565 6.937066 2.894422 6.748543
## V3subaru V3toyota V3volkswagen V4diesel V5std
## 132.115445 12.241818 9.410616 268.394689 6.483269
## V6four V7convertible V7hardtop V7hatchback V7sedan
## 4.202776 3.218629 3.159356 8.725028 4.896685
## V84wd V8fwd V9front V10 V11
## 3.977805 12.440471 40.396649 20.837847 25.637596
## V12 V13 V14 V15dohc V15l
## 15.160869 8.596078 49.859361 123.764751 42.910476
## V15ohc V15ohcv V16eight V16five V16four
## 442.762842 124.335120 34.406452 94.452505 319.646621
## V16six V17 V181bbl V182bbl V184bbl
## 140.385513 73.753565 32.233720 100.379126 11.380250
## V18mfi V18mpfi V18spdi V19 V20
## 3.581270 119.150014 22.948792 16.120930 6.435678
## V21 V22 V23 V24 V25
## 254.869269 56.305567 6.367230 48.965646 40.455029
# Split the whole dataset into training (80%) and testing (20%).
dt = sort(sample(nrow(data), nrow(data)*.8))
train<-data[dt,]
test<-data[-dt,]
# Prediction Model
model = lm(V26~., data=train)
sigma(model)
## [1] 0.04507635
summary(model)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.361188953 0.23195268 1.55716652 1.225590e-01
## V1 -0.047329156 0.03676030 -1.28750731 2.008600e-01
## V2 -0.083821734 0.04098680 -2.04509072 4.344634e-02
## V3alfa.romero 0.127258273 0.08107208 1.56969302 1.196142e-01
## V3audi 0.135776356 0.07397406 1.83545893 6.938023e-02
## V3bmw 0.219476649 0.03707762 5.91938300 4.467396e-08
## V3chevrolet -0.071602310 0.05095249 -1.40527602 1.630077e-01
## V3dodge -0.060908957 0.04714493 -1.29195149 1.993224e-01
## V3honda -0.002491113 0.05688088 -0.04379527 9.651541e-01
## V3isuzu -0.043432734 0.06342372 -0.68480273 4.950369e-01
## V3jaguar 0.034328863 0.08980010 0.38228088 7.030563e-01
## V3mazda 0.027001642 0.03950180 0.68355477 4.958215e-01
## V3mercedes.benz 0.145378089 0.07020439 2.07078344 4.092919e-02
## V3mercury -0.026055167 0.05926235 -0.43965805 6.611241e-01
## V3mitsubishi -0.069437767 0.04930604 -1.40830129 1.621127e-01
## V3nissan 0.014931455 0.04057753 0.36797349 7.136630e-01
## V3peugot -0.079724725 0.16757049 -0.47576829 6.352669e-01
## V3plymouth -0.068973488 0.04611739 -1.49560682 1.378743e-01
## V3porsche 0.188480466 0.06535008 2.88416579 4.797157e-03
## V3renault -0.006484061 0.06028791 -0.10755160 9.145647e-01
## V3saab 0.155462343 0.05581286 2.78542142 6.384936e-03
## V3subaru -0.013736632 0.18293379 -0.07509073 9.402911e-01
## V3toyota -0.014356892 0.03499590 -0.41024496 6.824952e-01
## V3volkswagen 0.059700481 0.04636083 1.28773538 2.007809e-01
## V4diesel 0.447680922 0.21075227 2.12420447 3.609446e-02
## V5std -0.035833078 0.02798165 -1.28059203 2.032700e-01
## V6four -0.006870488 0.01520855 -0.45175172 6.524165e-01
## V7convertible 0.016392414 0.04320655 0.37939651 7.051899e-01
## V7hardtop 0.018350367 0.03103873 0.59120870 5.557014e-01
## V7hatchback -0.003529723 0.02182651 -0.16171723 8.718516e-01
## V7sedan 0.011956335 0.01534366 0.77923602 4.376627e-01
## V84wd -0.014313281 0.03607135 -0.39680473 6.923487e-01
## V8fwd -0.029865540 0.02615983 -1.14165650 2.562963e-01
## V9front -0.224342160 0.20093500 -1.11649123 2.668610e-01
## V10 0.202265978 0.09485043 2.13247287 3.539212e-02
## V11 -0.262223227 0.10354161 -2.53253949 1.286345e-02
## V12 0.157153441 0.07727497 2.03369133 4.460499e-02
## V13 -0.117531515 0.04955527 -2.37172578 1.960066e-02
## V14 0.583981926 0.17668475 3.30521981 1.314323e-03
## V15dohc -0.036827509 0.17224457 -0.21380940 8.311267e-01
## V15ohc -0.008441960 0.17153418 -0.04921445 9.608456e-01
## V15ohcv -0.098010445 0.16943676 -0.57844853 5.642490e-01
## V16eight 0.063418057 0.15326527 0.41377971 6.799128e-01
## V16five -0.179221318 0.16505608 -1.08582077 2.801431e-01
## V16four -0.112439779 0.16044220 -0.70081173 4.850319e-01
## V16six -0.084898417 0.13851040 -0.61293893 5.412945e-01
## V17 0.380803905 0.22848109 1.66667577 9.867720e-02
## V181bbl -0.049562089 0.08195102 -0.60477694 5.466835e-01
## V182bbl 0.016334599 0.07093102 0.23028852 8.183334e-01
## V184bbl -0.064193207 0.09034539 -0.71053104 4.790124e-01
## V18mpfi -0.016976116 0.07249950 -0.23415492 8.153387e-01
## V18spdi -0.048801944 0.07926582 -0.61567448 5.394943e-01
## V19 -0.091072685 0.08062927 -1.12952389 2.613524e-01
## V20 -0.045396574 0.07429543 -0.61102779 5.425539e-01
## V21 -0.586528960 0.25156487 -2.33152174 2.171197e-02
## V22 -0.007129480 0.19186019 -0.03715976 9.704310e-01
## V23 0.163240164 0.04799095 3.40147793 9.614494e-04
## V24 0.013405162 0.14546264 0.09215536 9.267571e-01
## V25 0.154658012 0.14688694 1.05290513 2.948969e-01
options(warn=-1)
pred<-model%>%predict(test)
options(warn=1)
summary(model)
##
## Call:
## lm(formula = V26 ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.09048 -0.02366 0.00000 0.01935 0.17254
##
## Coefficients: (16 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.361189 0.231953 1.557 0.122559
## V1 -0.047329 0.036760 -1.288 0.200860
## V2 -0.083822 0.040987 -2.045 0.043446 *
## V3alfa.romero 0.127258 0.081072 1.570 0.119614
## V3audi 0.135776 0.073974 1.835 0.069380 .
## V3bmw 0.219477 0.037078 5.919 4.47e-08 ***
## V3chevrolet -0.071602 0.050952 -1.405 0.163008
## V3dodge -0.060909 0.047145 -1.292 0.199322
## V3honda -0.002491 0.056881 -0.044 0.965154
## V3isuzu -0.043433 0.063424 -0.685 0.495037
## V3jaguar 0.034329 0.089800 0.382 0.703056
## V3mazda 0.027002 0.039502 0.684 0.495822
## V3mercedes.benz 0.145378 0.070204 2.071 0.040929 *
## V3mercury -0.026055 0.059262 -0.440 0.661124
## V3mitsubishi -0.069438 0.049306 -1.408 0.162113
## V3nissan 0.014931 0.040578 0.368 0.713663
## V3peugot -0.079725 0.167570 -0.476 0.635267
## V3plymouth -0.068973 0.046117 -1.496 0.137874
## V3porsche 0.188480 0.065350 2.884 0.004797 **
## V3renault -0.006484 0.060288 -0.108 0.914565
## V3saab 0.155462 0.055813 2.785 0.006385 **
## V3subaru -0.013737 0.182934 -0.075 0.940291
## V3toyota -0.014357 0.034996 -0.410 0.682495
## V3volkswagen 0.059700 0.046361 1.288 0.200781
## V3volvo NA NA NA NA
## V4diesel 0.447681 0.210752 2.124 0.036094 *
## V4gas NA NA NA NA
## V5std -0.035833 0.027982 -1.281 0.203270
## V5turbo NA NA NA NA
## V6four -0.006870 0.015209 -0.452 0.652416
## V6two NA NA NA NA
## V7convertible 0.016392 0.043207 0.379 0.705190
## V7hardtop 0.018350 0.031039 0.591 0.555701
## V7hatchback -0.003530 0.021827 -0.162 0.871852
## V7sedan 0.011956 0.015344 0.779 0.437663
## V7wagon NA NA NA NA
## V84wd -0.014313 0.036071 -0.397 0.692349
## V8fwd -0.029866 0.026160 -1.142 0.256296
## V8rwd NA NA NA NA
## V9front -0.224342 0.200935 -1.116 0.266861
## V9rear NA NA NA NA
## V10 0.202266 0.094850 2.132 0.035392 *
## V11 -0.262223 0.103542 -2.533 0.012863 *
## V12 0.157153 0.077275 2.034 0.044605 *
## V13 -0.117532 0.049555 -2.372 0.019601 *
## V14 0.583982 0.176685 3.305 0.001314 **
## V15dohc -0.036828 0.172245 -0.214 0.831127
## V15l NA NA NA NA
## V15ohc -0.008442 0.171534 -0.049 0.960846
## V15ohcf NA NA NA NA
## V15ohcv -0.098010 0.169437 -0.578 0.564249
## V15rotor NA NA NA NA
## V16eight 0.063418 0.153265 0.414 0.679913
## V16five -0.179221 0.165056 -1.086 0.280143
## V16four -0.112440 0.160442 -0.701 0.485032
## V16six -0.084898 0.138510 -0.613 0.541294
## V16three NA NA NA NA
## V16twelve NA NA NA NA
## V16two NA NA NA NA
## V17 0.380804 0.228481 1.667 0.098677 .
## V181bbl -0.049562 0.081951 -0.605 0.546684
## V182bbl 0.016335 0.070931 0.230 0.818333
## V184bbl -0.064193 0.090345 -0.711 0.479012
## V18idi NA NA NA NA
## V18mfi NA NA NA NA
## V18mpfi -0.016976 0.072500 -0.234 0.815339
## V18spdi -0.048802 0.079266 -0.616 0.539494
## V18spfi NA NA NA NA
## V19 -0.091073 0.080629 -1.130 0.261352
## V20 -0.045397 0.074295 -0.611 0.542554
## V21 -0.586529 0.251565 -2.332 0.021712 *
## V22 -0.007129 0.191860 -0.037 0.970431
## V23 0.163240 0.047991 3.401 0.000961 ***
## V24 0.013405 0.145463 0.092 0.926757
## V25 0.154658 0.146887 1.053 0.294897
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04508 on 101 degrees of freedom
## Multiple R-squared: 0.9677, Adjusted R-squared: 0.9491
## F-statistic: 52.13 on 58 and 101 DF, p-value: < 2.2e-16
actual = test$V26
predicted = pred
range(data$V26)
## [1] 0 1
rmse(actual, predicted)
## [1] 0.04676572
mae(actual, predicted)
## [1] 0.03903871
